# https://github.com/oomol-lab/pdf-craft?tab=readme-ov-file
# 1.图片的意思可以让大模型解释一下,然后作为图片的描述
# 2.困难的文本,用户阅读的时候可以标记出来,大模型就会在后台自动的给找答案进行解释
from pdf_craft import PDFPageExtractor, MarkDownWriter

import os
import sys
import onnxruntime

if len(sys.argv) != 2:
    print("Usage: python script.py <name_of_pdf>")
    sys.exit(1)

book_name = sys.argv[1]

extractor = PDFPageExtractor(
  device="cuda:0", # cpu,or If you want to use CUDA, please change to device="cuda:0" format.
  model_dir_path="model", # The folder address where the AI ​​model is downloaded and installed
)
markdown_path="data/"+book_name+".md"
with MarkDownWriter(markdown_path, "images", "utf-8") as md:
  for block in extractor.extract(pdf="data/"+book_name+".pdf"):
    md.write(block)